library(rstan)
## Warning: package 'rstan' was built under R version 4.3.3
## Loading required package: StanHeaders
## Warning: package 'StanHeaders' was built under R version 4.3.3
##
## rstan version 2.32.7 (Stan version 2.32.2)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(rstanarm)
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.3.3
## This is rstanarm version 2.32.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
##
## Attaching package: 'rstanarm'
## The following object is masked from 'package:rstan':
##
## loo
library(bayesplot)
## Warning: package 'bayesplot' was built under R version 4.3.3
## This is bayesplot version 1.12.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
library(tidyr) # for pivot_longer
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:rstan':
##
## extract
library(dplyr) # for %>%
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(truncnorm) # for rnorm with minimum and maximum values
library(loo)
## Warning: package 'loo' was built under R version 4.3.3
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
##
## Attaching package: 'loo'
## The following object is masked from 'package:rstan':
##
## loo
library(lubridate) # for simplifying working with dates
## Warning: package 'lubridate' was built under R version 4.3.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Create custom palette because I want to distinguish well between nations and don't like the existing options
custom_palette <- c("#9F002E", "#B23AEE", "#FF50FF", "#FF7F00", "#FFB900", "#00EEEE", "#4EEE94","#458B00", "#4876FF")
# Load the dataset
#install.packages("mlmRev")
library(mlmRev)
## Loading required package: lme4
## Warning: package 'lme4' was built under R version 4.3.3
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
data("Mmmec")
# Check for missing values
colSums(is.na(Mmmec))
## nation region county deaths expected uvb
## 0 0 0 0 0 0
# Summarize statistics about deaths and uvb overall
summary(Mmmec$deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 8.00 14.50 27.83 31.00 313.00
# 1st quantile = 8 means that 25% of the observations have less than 8 deaths
# 3rd quantile = 31 means that 75% of the observations have less than 31 deaths
deaths_by_country <- aggregate(deaths ~ nation, data = Mmmec, sum)
print(deaths_by_country)
## nation deaths
## 1 Belgium 449
## 2 W.Germany 2949
## 3 Denmark 681
## 4 France 1495
## 5 UK 2179
## 6 Italy 1462
## 7 Ireland 67
## 8 Luxembourg 23
## 9 Netherlands 546
summary(Mmmec$uvb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.900200 -4.158400 -0.886400 0.000204 3.275525 13.359000
# Explorative analysis
# Some histograms of the distribution of deaths and expected deaths
# Only deaths
ggplot(Mmmec, aes(x = deaths)) +
geom_histogram(binwidth = 1, fill = "red", color = "black") +
ggtitle("Distribution of Deaths")

ggsave(file="images/distribution_deaths.pdf", width=18,height=10.5)
# Only expected deaths
ggplot(Mmmec, aes(x = expected)) +
geom_histogram(binwidth = 1, fill = "yellow", color = "black") +
ggtitle("Distribution of Expected Deaths")

ggsave(file="images/distribution_expecteddeaths.pdf", width=18,height=10.5)
# Both deaths and expected deaths
# Gather the data into a long format using pivot_longer
Mmmec_long <- Mmmec %>%
pivot_longer(cols = c(deaths, expected), names_to = "type", values_to = "value")
# Create an overlapped (position="identity") histogram with transparency (alpha=0.6)
ggplot(Mmmec_long, aes(x = value, fill = type)) +
geom_histogram(binwidth = 1, alpha = 0.6, position = "identity", color = "black") +
ggtitle("Distribution of Deaths and Expected Deaths") +
xlab("Deaths / Expected Deaths") +
scale_fill_manual(values = c("deaths" = "red", "expected" = "yellow"), labels = c("Deaths", "Expected Deaths"))

ggsave(file="images/distribution_deaths_expected.pdf", width=18,height=10.5)
# Some scatter plots of deaths VS UVB exposure
# With a single regression line
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
geom_smooth(method = "lm", color = "blue") +
scale_colour_manual(values = custom_palette) +
ggtitle("Deaths VS UVB Exposure") +
geom_jitter()
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb.pdf", width=9.3,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# With a regression line for each nation
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
geom_smooth(method = "lm", se = FALSE) + # Adds a regression line for each nation
scale_colour_manual(values = custom_palette) +
ggtitle("Deaths VS UVB Exposure") +
geom_jitter()
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb_regression_by_nation.pdf", width=9.3,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# Or dividing by nation using facet_wrap, so that it is less chaotic
ggplot(Mmmec, aes(x = uvb, y = deaths, color = nation)) +
geom_point() +
geom_smooth(method = "lm", color = "blue") +
facet_wrap(~ nation) +
scale_colour_manual(values = custom_palette) +
ggtitle("Deaths VS UVB Exposure by Nation") +
theme(legend.position = "none")
## `geom_smooth()` using formula = 'y ~ x'

ggsave(file="images/scatter_deaths_vs_uvb_by_nation.pdf", width=8,height=7)
## `geom_smooth()` using formula = 'y ~ x'
# Boxplots of deaths by nation
ggplot(Mmmec, aes(x = nation, y = deaths)) +
geom_boxplot(fill = custom_palette) +
ggtitle("Deaths across counties in nations")

ggsave(file="images/boxplot_deaths_by_nation.pdf", width=8,height=7)
# Generalized linear mixed model with frequentist approach (in particular ML approach)
M1 <- glmer(deaths ~ uvb + (1 | region) + (1 | nation), Mmmec, poisson, offset = log(expected))
# (1 | k) includes varying intercepts for each k
# log(expected) is an offset term to adjust the model to account for the expected number of deaths
summary(M1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: deaths ~ uvb + (1 | region) + (1 | nation)
## Data: Mmmec
## Offset: log(expected)
##
## AIC BIC logLik -2*log(L) df.resid
## 2198.7 2214.2 -1095.3 2190.7 350
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9440 -0.7788 -0.0071 0.6277 3.9102
##
## Random effects:
## Groups Name Variance Std.Dev.
## region (Intercept) 0.04829 0.2198
## nation (Intercept) 0.13708 0.3702
## Number of obs: 354, groups: region, 78; nation, 9
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.06398 0.13351 -0.479 0.6318
## uvb -0.02822 0.01139 -2.478 0.0132 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## uvb 0.185
# Bayes approach relying on HMC sampling from the posterior distribution
M1.rstanarm <- stan_glmer(deaths ~ uvb + (1 | region) + (1 | nation), Mmmec, poisson, offset = log(expected))
##
## SAMPLING FOR MODEL 'count' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.001306 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 13.06 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 6.543 seconds (Warm-up)
## Chain 1: 5.267 seconds (Sampling)
## Chain 1: 11.81 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'count' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 5.7e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.57 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 6.965 seconds (Warm-up)
## Chain 2: 5.254 seconds (Sampling)
## Chain 2: 12.219 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'count' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 4.7e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.47 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 7.145 seconds (Warm-up)
## Chain 3: 5.21 seconds (Sampling)
## Chain 3: 12.355 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'count' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 5.1e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.51 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 7.386 seconds (Warm-up)
## Chain 4: 5.11 seconds (Sampling)
## Chain 4: 12.496 seconds (Total)
## Chain 4:
print(M1.rstanarm)
## stan_glmer
## family: poisson [log]
## formula: deaths ~ uvb + (1 | region) + (1 | nation)
## observations: 354
## ------
## Median MAD_SD
## (Intercept) -0.1 0.2
## uvb 0.0 0.0
##
## Error terms:
## Groups Name Std.Dev.
## region (Intercept) 0.23
## nation (Intercept) 0.48
## Num. levels: region 78, nation 9
##
## ------
## * For help interpreting the printed output see ?print.stanreg
## * For info on the priors used see ?prior_summary.stanreg
summary(M1.rstanarm)
##
## Model Info:
## function: stan_glmer
## family: poisson [log]
## formula: deaths ~ uvb + (1 | region) + (1 | nation)
## algorithm: sampling
## sample: 4000 (posterior sample size)
## priors: see help('prior_summary')
## observations: 354
## groups: region (78), nation (9)
##
## Estimates:
## mean sd 10% 50% 90%
## (Intercept) -0.1 0.2 -0.3 -0.1 0.1
## uvb 0.0 0.0 0.0 0.0 0.0
## b[(Intercept) region:1] 0.4 0.1 0.2 0.4 0.6
## b[(Intercept) region:2] 0.0 0.1 -0.1 0.0 0.2
## b[(Intercept) region:3] -0.5 0.1 -0.6 -0.5 -0.3
## b[(Intercept) region:4] -0.1 0.1 -0.2 -0.1 0.0
## b[(Intercept) region:5] 0.1 0.1 0.0 0.1 0.2
## b[(Intercept) region:6] 0.4 0.1 0.2 0.4 0.5
## b[(Intercept) region:7] 0.0 0.1 -0.2 0.0 0.2
## b[(Intercept) region:8] 0.3 0.1 0.1 0.3 0.4
## b[(Intercept) region:9] -0.1 0.1 -0.2 -0.1 0.1
## b[(Intercept) region:10] -0.1 0.1 -0.2 -0.1 0.0
## b[(Intercept) region:11] -0.3 0.1 -0.4 -0.3 -0.2
## b[(Intercept) region:12] 0.0 0.1 -0.1 0.0 0.1
## b[(Intercept) region:13] -0.2 0.1 -0.4 -0.2 0.0
## b[(Intercept) region:14] 0.1 0.1 0.0 0.1 0.2
## b[(Intercept) region:15] 0.2 0.1 0.0 0.2 0.4
## b[(Intercept) region:16] 0.1 0.1 -0.1 0.1 0.3
## b[(Intercept) region:17] -0.1 0.1 -0.3 -0.1 0.0
## b[(Intercept) region:18] 0.1 0.1 -0.1 0.1 0.3
## b[(Intercept) region:19] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:20] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:21] -0.2 0.1 -0.4 -0.2 0.0
## b[(Intercept) region:22] 0.0 0.1 -0.1 0.1 0.2
## b[(Intercept) region:23] 0.2 0.1 0.1 0.2 0.3
## b[(Intercept) region:24] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:25] -0.1 0.1 -0.2 -0.1 0.1
## b[(Intercept) region:27] 0.0 0.1 -0.1 0.0 0.2
## b[(Intercept) region:28] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:29] -0.1 0.1 -0.2 -0.1 0.0
## b[(Intercept) region:30] 0.1 0.1 -0.1 0.1 0.3
## b[(Intercept) region:31] 0.1 0.2 -0.1 0.1 0.3
## b[(Intercept) region:32] -0.2 0.1 -0.3 -0.2 0.0
## b[(Intercept) region:33] 0.0 0.1 -0.2 0.0 0.1
## b[(Intercept) region:34] -0.2 0.1 -0.3 -0.2 -0.1
## b[(Intercept) region:35] -0.1 0.1 -0.2 -0.1 0.1
## b[(Intercept) region:36] -0.4 0.1 -0.6 -0.4 -0.2
## b[(Intercept) region:37] 0.0 0.1 -0.2 0.0 0.2
## b[(Intercept) region:38] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:39] 0.1 0.1 0.0 0.1 0.2
## b[(Intercept) region:40] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:41] 0.0 0.1 -0.1 0.0 0.2
## b[(Intercept) region:42] -0.2 0.1 -0.3 -0.2 -0.1
## b[(Intercept) region:43] -0.2 0.1 -0.3 -0.2 -0.1
## b[(Intercept) region:44] 0.3 0.1 0.2 0.3 0.4
## b[(Intercept) region:45] 0.3 0.1 0.1 0.3 0.4
## b[(Intercept) region:46] -0.1 0.1 -0.2 -0.1 0.1
## b[(Intercept) region:47] 0.0 0.1 -0.1 0.0 0.1
## b[(Intercept) region:48] 0.1 0.1 -0.1 0.1 0.2
## b[(Intercept) region:49] -0.1 0.1 -0.2 -0.1 0.0
## b[(Intercept) region:50] -0.2 0.1 -0.4 -0.2 -0.1
## b[(Intercept) region:51] 0.0 0.1 -0.2 0.0 0.2
## b[(Intercept) region:52] -0.1 0.2 -0.3 -0.1 0.1
## b[(Intercept) region:53] -0.4 0.2 -0.6 -0.4 -0.2
## b[(Intercept) region:54] -0.6 0.1 -0.7 -0.6 -0.4
## b[(Intercept) region:55] 0.2 0.1 0.1 0.2 0.3
## b[(Intercept) region:56] 0.4 0.1 0.2 0.4 0.5
## b[(Intercept) region:57] 0.2 0.1 0.0 0.2 0.3
## b[(Intercept) region:58] 0.2 0.1 0.1 0.2 0.4
## b[(Intercept) region:59] 0.0 0.1 -0.1 0.0 0.1
## b[(Intercept) region:60] 0.0 0.1 -0.2 0.0 0.1
## b[(Intercept) region:61] 0.0 0.2 -0.2 0.0 0.2
## b[(Intercept) region:62] 0.3 0.1 0.1 0.3 0.4
## b[(Intercept) region:63] -0.2 0.1 -0.4 -0.2 -0.1
## b[(Intercept) region:64] -0.1 0.1 -0.3 -0.1 0.1
## b[(Intercept) region:65] -0.2 0.1 -0.4 -0.2 0.0
## b[(Intercept) region:66] 0.2 0.1 0.0 0.2 0.3
## b[(Intercept) region:67] 0.1 0.2 -0.1 0.1 0.3
## b[(Intercept) region:68] 0.0 0.2 -0.2 0.0 0.2
## b[(Intercept) region:69] 0.1 0.2 -0.1 0.1 0.4
## b[(Intercept) region:70] 0.2 0.1 0.0 0.2 0.3
## b[(Intercept) region:71] -0.1 0.2 -0.4 -0.1 0.1
## b[(Intercept) region:72] 0.0 0.2 -0.2 0.0 0.2
## b[(Intercept) region:73] 0.0 0.2 -0.3 0.0 0.2
## b[(Intercept) region:74] 0.0 0.2 -0.3 0.0 0.3
## b[(Intercept) region:75] 0.0 0.2 -0.3 0.0 0.3
## b[(Intercept) region:76] -0.1 0.1 -0.3 -0.1 0.1
## b[(Intercept) region:77] 0.0 0.1 -0.2 0.0 0.2
## b[(Intercept) region:78] 0.2 0.1 0.1 0.2 0.4
## b[(Intercept) region:79] -0.1 0.1 -0.3 -0.1 0.0
## b[(Intercept) nation:Belgium] -0.1 0.2 -0.3 -0.1 0.2
## b[(Intercept) nation:W.Germany] 0.5 0.2 0.3 0.5 0.7
## b[(Intercept) nation:Denmark] 0.6 0.2 0.4 0.6 0.9
## b[(Intercept) nation:France] -0.5 0.2 -0.7 -0.5 -0.3
## b[(Intercept) nation:UK] -0.1 0.2 -0.3 -0.1 0.1
## b[(Intercept) nation:Italy] 0.0 0.2 -0.3 0.0 0.3
## b[(Intercept) nation:Ireland] -0.5 0.2 -0.8 -0.5 -0.2
## b[(Intercept) nation:Luxembourg] 0.0 0.3 -0.3 0.0 0.4
## b[(Intercept) nation:Netherlands] 0.1 0.2 -0.2 0.1 0.3
## Sigma[region:(Intercept),(Intercept)] 0.1 0.0 0.0 0.1 0.1
## Sigma[nation:(Intercept),(Intercept)] 0.2 0.2 0.1 0.2 0.4
##
## Fit Diagnostics:
## mean sd 10% 50% 90%
## mean_PPD 27.8 0.4 27.3 27.8 28.3
##
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
##
## MCMC diagnostics
## mcse Rhat n_eff
## (Intercept) 0.0 1.0 1132
## uvb 0.0 1.0 1793
## b[(Intercept) region:1] 0.0 1.0 3496
## b[(Intercept) region:2] 0.0 1.0 2772
## b[(Intercept) region:3] 0.0 1.0 2885
## b[(Intercept) region:4] 0.0 1.0 1671
## b[(Intercept) region:5] 0.0 1.0 1585
## b[(Intercept) region:6] 0.0 1.0 2352
## b[(Intercept) region:7] 0.0 1.0 4713
## b[(Intercept) region:8] 0.0 1.0 2735
## b[(Intercept) region:9] 0.0 1.0 1927
## b[(Intercept) region:10] 0.0 1.0 1793
## b[(Intercept) region:11] 0.0 1.0 1557
## b[(Intercept) region:12] 0.0 1.0 2130
## b[(Intercept) region:13] 0.0 1.0 3902
## b[(Intercept) region:14] 0.0 1.0 2285
## b[(Intercept) region:15] 0.0 1.0 2530
## b[(Intercept) region:16] 0.0 1.0 2595
## b[(Intercept) region:17] 0.0 1.0 2465
## b[(Intercept) region:18] 0.0 1.0 5406
## b[(Intercept) region:19] 0.0 1.0 4226
## b[(Intercept) region:20] 0.0 1.0 5968
## b[(Intercept) region:21] 0.0 1.0 6408
## b[(Intercept) region:22] 0.0 1.0 5847
## b[(Intercept) region:23] 0.0 1.0 4410
## b[(Intercept) region:24] 0.0 1.0 4358
## b[(Intercept) region:25] 0.0 1.0 6760
## b[(Intercept) region:27] 0.0 1.0 6407
## b[(Intercept) region:28] 0.0 1.0 5122
## b[(Intercept) region:29] 0.0 1.0 3586
## b[(Intercept) region:30] 0.0 1.0 4497
## b[(Intercept) region:31] 0.0 1.0 7952
## b[(Intercept) region:32] 0.0 1.0 5685
## b[(Intercept) region:33] 0.0 1.0 4922
## b[(Intercept) region:34] 0.0 1.0 4500
## b[(Intercept) region:35] 0.0 1.0 4731
## b[(Intercept) region:36] 0.0 1.0 5650
## b[(Intercept) region:37] 0.0 1.0 6754
## b[(Intercept) region:38] 0.0 1.0 3021
## b[(Intercept) region:39] 0.0 1.0 3152
## b[(Intercept) region:40] 0.0 1.0 3493
## b[(Intercept) region:41] 0.0 1.0 2759
## b[(Intercept) region:42] 0.0 1.0 3032
## b[(Intercept) region:43] 0.0 1.0 2149
## b[(Intercept) region:44] 0.0 1.0 1715
## b[(Intercept) region:45] 0.0 1.0 2355
## b[(Intercept) region:46] 0.0 1.0 2341
## b[(Intercept) region:47] 0.0 1.0 2295
## b[(Intercept) region:48] 0.0 1.0 3042
## b[(Intercept) region:49] 0.0 1.0 2167
## b[(Intercept) region:50] 0.0 1.0 4527
## b[(Intercept) region:51] 0.0 1.0 6553
## b[(Intercept) region:52] 0.0 1.0 6710
## b[(Intercept) region:53] 0.0 1.0 4171
## b[(Intercept) region:54] 0.0 1.0 4299
## b[(Intercept) region:55] 0.0 1.0 3345
## b[(Intercept) region:56] 0.0 1.0 4770
## b[(Intercept) region:57] 0.0 1.0 4030
## b[(Intercept) region:58] 0.0 1.0 5258
## b[(Intercept) region:59] 0.0 1.0 2330
## b[(Intercept) region:60] 0.0 1.0 5870
## b[(Intercept) region:61] 0.0 1.0 8353
## b[(Intercept) region:62] 0.0 1.0 3113
## b[(Intercept) region:63] 0.0 1.0 4517
## b[(Intercept) region:64] 0.0 1.0 4910
## b[(Intercept) region:65] 0.0 1.0 2993
## b[(Intercept) region:66] 0.0 1.0 4218
## b[(Intercept) region:67] 0.0 1.0 5987
## b[(Intercept) region:68] 0.0 1.0 6836
## b[(Intercept) region:69] 0.0 1.0 7666
## b[(Intercept) region:70] 0.0 1.0 2826
## b[(Intercept) region:71] 0.0 1.0 6139
## b[(Intercept) region:72] 0.0 1.0 5247
## b[(Intercept) region:73] 0.0 1.0 6022
## b[(Intercept) region:74] 0.0 1.0 7826
## b[(Intercept) region:75] 0.0 1.0 5134
## b[(Intercept) region:76] 0.0 1.0 3391
## b[(Intercept) region:77] 0.0 1.0 3109
## b[(Intercept) region:78] 0.0 1.0 2822
## b[(Intercept) region:79] 0.0 1.0 2852
## b[(Intercept) nation:Belgium] 0.0 1.0 1568
## b[(Intercept) nation:W.Germany] 0.0 1.0 1258
## b[(Intercept) nation:Denmark] 0.0 1.0 1527
## b[(Intercept) nation:France] 0.0 1.0 1218
## b[(Intercept) nation:UK] 0.0 1.0 1272
## b[(Intercept) nation:Italy] 0.0 1.0 1070
## b[(Intercept) nation:Ireland] 0.0 1.0 1890
## b[(Intercept) nation:Luxembourg] 0.0 1.0 2785
## b[(Intercept) nation:Netherlands] 0.0 1.0 1459
## Sigma[region:(Intercept),(Intercept)] 0.0 1.0 1393
## Sigma[nation:(Intercept),(Intercept)] 0.0 1.0 1553
## mean_PPD 0.0 1.0 3965
## log-posterior 0.3 1.0 960
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
# Extract Leave-One-Out Cross-Validation
loo_M1rs <- loo(M1.rstanarm)
## Warning: Found 11 observations with a pareto_k > 0.7. With this many problematic observations we recommend calling 'kfold' with argument 'K=10' to perform 10-fold cross-validation rather than LOO.
print(loo_M1rs)
##
## Computed from 4000 by 354 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -1066.6 25.6
## p_loo 74.9 8.3
## looic 2133.2 51.2
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.3, 2.3]).
##
## Pareto k diagnostic values:
## Count Pct. Min. ESS
## (-Inf, 0.7] (good) 343 96.9% 252
## (0.7, 1] (bad) 9 2.5% <NA>
## (1, Inf) (very bad) 2 0.6% <NA>
## See help('pareto-k-diagnostic') for details.
# Posterior predictive check of reproduced data
y_rep <- posterior_predict(M1.rstanarm)
# densities
pp_check(M1.rstanarm)

ggsave(file="images/densities_M1rstanarm.pdf", width=8,height=7)
# Plot the credible intervals
beta_names <- c(paste0("beta^", c("uvb")), "gl.intercept")
alpha_names<-c()
for (i in 1:25){
alpha_names[i] <- paste0("region[", i,"]")
} # region 26 is missing
for (i in 27:79){
alpha_names[i-1] <- paste0("region[", i,"]")
}
alpha_names[79] <- paste0("Belgium")
alpha_names[80] <- paste0("WG")
alpha_names[81] <- paste0("Denmark")
alpha_names[82] <- paste0("France")
alpha_names[83] <- paste0("UK")
alpha_names[84] <- paste0("Italy")
alpha_names[85] <- paste0("Ireland")
alpha_names[86] <- paste0("Luxembourg")
alpha_names[87] <- paste0("Netherlands")
alpha_names[88] <- paste0(expression(sigma[region]))
alpha_names[89] <- paste0(expression(sigma[nation]))
posterior_M1 <- as.matrix(M1.rstanarm)
mcmc_intervals(posterior_M1, regex_pars=c( "uvb",
"(Intercept)", "b"))+
xaxis_text(on =TRUE, size=rel(1.9))+
yaxis_text(on =TRUE, size=rel(1.4))+
scale_y_discrete(labels = ((parse(text= c(beta_names, alpha_names)))))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggsave(file="images/logistic_credible_intervals.pdf", width=5, height=length(alpha_names) * 0.25)
# Plot the posterior marginal densities along with 50% intervals for the ‘fixed-effects’ uvb
mcmc_areas(posterior_M1, regex_pars = c("uvb"))+
xaxis_text(on =TRUE, size=rel(1.9))+
yaxis_text(on =TRUE, size=rel(1.4))

ggsave(file = "images/logistic_fixed_effects.pdf", width=8, height=7)
# Plot the random effects (posterior mean +- s.e.)
int_ord <- sort(coef(M1.rstanarm)$region[,1], index.return=TRUE)$x
ord <- sort(coef(M1.rstanarm)$region[,1], index.return=TRUE)$ix
region.abbr <- levels(Mmmec$region)
region.abbr.ord <- region.abbr[ord]
se_ord <- M1.rstanarm$ses[ord]
par(xaxt="n", mfrow=c(1,1), mar = c(5,2,2,1))
plot(1:length(int_ord), int_ord, ylim=c(-1.4,1.4), pch=19, bg=2, xlab="Regions",
ylab="Intercepts", cex.main=1.9, cex.lab=1.9)
for (h in 1:length(int_ord)){
segments(h, int_ord[h]-se_ord[h], h, int_ord[h]+se_ord[h], col="red")
is.wholenumber <-
function(x, tol = .Machine$double.eps^0.5)
abs(x - round(x)) < tol
if (is.wholenumber(h/2)){
text(h, int_ord[h]+se_ord[h]+0.1, region.abbr.ord[h], cex=1.1)}else{
text(h, int_ord[h]-se_ord[h]-0.1, region.abbr.ord[h], cex=1.1)
}
}

ggsave(file="images/random_effects_log.pdf", height=7, width=length(int_ord) * 0.4)
# empirical distribution function
ppc_ecdf_overlay(y = M1.rstanarm$y, y_rep[1:200,])+
xaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))

ggsave(file="images/ecdf_M1.rstanarm.pdf", width=8, height=7)
# proportion of zero
prop_zero <- function(x) mean(x == 0)
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat = "prop_zero")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/proportion_zero_M1.rstanarm.pdf", width=8, height=7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# statistics
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="mean")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/mean_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="sd")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/sd_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="median")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/median_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = M1.rstanarm$y, yrep = y_rep, stat="max")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/max_M1.rstanarm.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# standardized residuals
mean_y_rep <- colMeans(y_rep)
std_resid <- (M1.rstanarm$y - mean_y_rep) / sqrt(mean_y_rep)
qplot(mean_y_rep, std_resid) + hline_at(2) + hline_at(-2)+
labs(x="Mean of y_rep", y= "Standardized residuals")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16))
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggsave(file="images/standardized_residuals_M1.rstanarm.pdf", width =8, height =7)
# predictive intervals
ppc_intervals(
y = M1.rstanarm$y,
yrep = y_rep,
x = NULL # missing data but in the next stan model it is NULL too
) +
labs(x = "Deaths", y = "Expected deaths")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text(size=20),
axis.title.y = element_text(size=20))

ggsave(file="images/predictive_intervals_M1.rstanarm.pdf", width =8, height =7)
# Stan model of model A of the paper
# It is a variance components model with UVBI included in the fixed part of the model and
# random terms s_k, u_jk, e_ijk associated respectively with the intercept at level 3, 2, 1
# s_k ~ normal(0, sigma_s)
# u_jk ~ normal(0, sigma_u)
# Create UVB index as in Table III of the paper
uvbi_params <- data.frame(
nation = c(unique(Mmmec$nation)),
mean_UVBI = c(12.70, 12.79, 9.96, 17.18, 10.91, 21.45, 10.54, 13.26, 11.40),
sd_UVBI = c(0.29, 1.35, 0.38, 2.59, 1.50, 3.51, 0.60, 0.05, 0.47),
min_UVBI = c(12.17, 10.45, 9.47, 12.92, 6.69, 16.83, 9.64, 13.22, 10.62),
max_UVBI = c(13.10, 15.15, 10.49, 23.24, 13.46, 28.95, 11.70, 13.31, 11.94)
)
# Join UVBI data to the dataframe Mmmec
Mmmec2 <- left_join(Mmmec, uvbi_params, by = "nation")
# Generate UVBI values for each county from mean_UVBI and sd_UVBI, respecting min_UVBI and max_UVBI
Mmmec2 <- Mmmec2 %>%
mutate(UVBI = rtruncnorm(n(), a = min_UVBI, b = max_UVBI, mean = mean_UVBI, sd = sd_UVBI))
# Prepare data for stan model
stan_data <- list(
N = nrow(Mmmec2),
deaths = Mmmec2$deaths,
expected = Mmmec2$expected,
K = length(unique(Mmmec2$nation)),
J = length(unique(Mmmec2$region)),
k = as.integer(as.factor(Mmmec2$nation)),
j = as.integer(as.factor(Mmmec2$region)),
UVBI = Mmmec2$UVBI
)
comp_model_A <- stan_model('poisson_regression.stan')
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 16.0.0 (clang-1600.0.26.6)’
## using SDK: ‘’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## 679 | #include <cmath>
## | ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
fit_model_A <- sampling(comp_model_A, data = stan_data, seed = 123)
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000117 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.17 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 6.414 seconds (Warm-up)
## Chain 1: 3.599 seconds (Sampling)
## Chain 1: 10.013 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 4.2e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.42 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 6.511 seconds (Warm-up)
## Chain 2: 4.635 seconds (Sampling)
## Chain 2: 11.146 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 4.4e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.44 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 6.797 seconds (Warm-up)
## Chain 3: 5.38 seconds (Sampling)
## Chain 3: 12.177 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 4.2e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.42 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 5.784 seconds (Warm-up)
## Chain 4: 5.319 seconds (Sampling)
## Chain 4: 11.103 seconds (Total)
## Chain 4:
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
print(fit_model_A, pars = c('beta0','beta1','sigma_s','sigma_u','sigma_e'))
## Inference for Stan model: anon_model.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta0 0.05 0.01 0.20 -0.35 -0.07 0.06 0.18 0.45 775 1.00
## beta1 -0.01 0.00 0.01 -0.02 -0.01 -0.01 0.00 0.01 3702 1.00
## sigma_s 0.49 0.00 0.16 0.27 0.38 0.46 0.56 0.88 1700 1.00
## sigma_u 0.23 0.00 0.03 0.18 0.21 0.23 0.25 0.29 1381 1.00
## sigma_e 0.12 0.00 0.02 0.08 0.10 0.12 0.13 0.16 162 1.02
##
## Samples were drawn using NUTS(diag_e) at Sat Jun 14 18:33:57 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
y_rep_model_A <- as.matrix(fit_model_A, pars = "y_rep")
# Posterior predictive checking
# Traceplot of the 4 chains to see if they mix well
traceplot(fit_model_A, pars = c('beta0', 'beta1', 'sigma_s', 'sigma_u', 'sigma_e'))

ggsave(file="images/traceplot.pdf", width=8, height=7)
# densities
ppc_dens_overlay(y = stan_data$deaths, y_rep_model_A[1:200,])+
xaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))

ggsave(file="images/densities_model_A.pdf", width=8, height=7)
# empirical distribution function
ppc_ecdf_overlay(y = stan_data$deaths, y_rep_model_A[1:200,])+
xaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))

ggsave(file="images/ecdf_model_A.pdf", width=8, height=7)
# proportion of zero
prop_zero <- function(x) mean(x == 0)
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat = "prop_zero")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
legend_text(size=rel(4))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/proportion_zero_model_A.pdf", width=8, height=7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# statistics
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="mean")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/mean_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="sd")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/sd_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="median")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/median_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(y = stan_data$deaths, yrep = y_rep_model_A, stat="max")+
xaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text( size=22))+
legend_text(size=rel(1.6))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave(file="images/max_model_A.pdf", width=5, height=5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# standardized residuals
mean_y_rep_model_A <- colMeans(y_rep_model_A)
std_resid <- (stan_data$deaths - mean_y_rep_model_A) / sqrt(mean_y_rep_model_A)
qplot(mean_y_rep_model_A, std_resid) + hline_at(2) + hline_at(-2)+
labs(x="Mean of y_rep", y= "Standardized residuals")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16))

ggsave(file="images/standardized_residuals_model_A.pdf", width =8, height =7)
# predictive intervals
ppc_intervals(
y = stan_data$deaths,
yrep = y_rep_model_A,
x = stan_data$uvb
) +
labs(x = "Deaths", y = "Expected deaths")+
xaxis_text(on =TRUE, size=22)+
yaxis_text(on =TRUE, size=22)+
theme(axis.title.x = element_text(size=20),
axis.title.y = element_text(size=20))

ggsave(file="images/predictive_intervals_model_A.pdf", width =8, height =7)
# Extract Leave-One-Out Cross-Validation
# Note that since I wrote my own stan model, I had to store the pointwise log-likelihood
log_lik_A <- extract_log_lik(fit_model_A)
loo_A <- loo(log_lik_A)
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
print(loo_A)
##
## Computed from 4000 by 354 log-likelihood matrix.
##
## Estimate SE
## elpd_loo -1051.0 20.7
## p_loo 110.2 8.8
## looic 2102.0 41.4
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume independent draws (r_eff=1).
##
## Pareto k diagnostic values:
## Count Pct. Min. ESS
## (-Inf, 0.7] (good) 320 90.4% 87
## (0.7, 1] (bad) 31 8.8% <NA>
## (1, Inf) (very bad) 3 0.8% <NA>
## See help('pareto-k-diagnostic') for details.